#Table A7

#directory
#set working directory to the path PvP_Replication
#setwd("~/PvP_Replication")

#packages
library(tidyverse) #for data cleaning and manipulation
library(texreg) #generate latex table 
library(estimatr) #estimation

#load main dataset
pvp_data <- read.csv("PvP_data/PvP_data_main.csv")

#transform independent variable
#create binary (hectares > 100) and log + 1 cultivation variables 
pvp_data <- pvp_data %>% 
  mutate(logcult = log(maxcult + 1), cultbinary = ifelse(maxcult > 100, 1, 0))

#subset data to municipalities with prior FARC presence
pvp_farc_subset <- pvp_data %>% filter(farc_presence == 1)

#transform selected weather and soil variables
pvp_farc_subset <- pvp_farc_subset %>% mutate(
  draindummy = as.factor(ifelse(DRAIN == "M", 1, 0)), #moderate drainage
  humiddist = case_when(humid_mean < 85 ~ (85 - humid_mean)^2, humid_mean > 85 ~ (humid_mean - 85)^2, TRUE ~ 0 ), #distance from optimal humidity point
  temprangedist = case_when(maxtemp > 27 ~ (maxtemp - 27)^2, mintemp < 14  ~ (14 - mintemp)^2, TRUE ~ 0), #distance from optimal temp, wide band
)

#instrument for binary independent variable excluding rainfall variable 
pvp_farc_subset$glm_inst_norain <- predict(glm(cultbinary ~ PHAQ + temprangedist + TOTN + TOTC + sun_mean + humiddist + draindummy, pvp_farc_subset, family = binomial(link = "logit")), type = "response")

#instrument for continuous independent variable excluding rainfall variable
pvp_farc_subset$ols_inst_norain <- predict(lm(logcult ~ PHAQ + temprangedist + TOTN + TOTC + sun_mean + humiddist + draindummy, pvp_farc_subset))

#transform control variables for iv models
pvp_farc_subset <- pvp_farc_subset %>% 
  mutate(logcrops = log(maxallcrops + 1), logfloods = log(floodalertmean + 1))

#iv models using iv robust (with and without control variables)
iv_model_cont_norain <- iv_robust(dissident_presence ~ logcult | ols_inst_norain, data = pvp_farc_subset, diagnostics = TRUE) 
iv_model_cont_norain_c <- iv_robust(dissident_presence ~ logcult + logcrops + logfloods | ols_inst_norain + logcrops + logfloods, data = pvp_farc_subset, diagnostics = TRUE) 

iv_model_bin_norain <- iv_robust(dissident_presence ~ cultbinary | glm_inst_norain, data = pvp_farc_subset, diagnostics = TRUE) 
iv_model_bin_norain_c <- iv_robust(dissident_presence ~ cultbinary + logcrops + logfloods | glm_inst_norain + logcrops + logfloods, data = pvp_farc_subset, diagnostics = TRUE) 

#first stage f statistic diagnostics
iv_norain_cont_fstat <- iv_model_cont_norain$diagnostic_first_stage_fstatistic["value"] %>% round(digits = 2)
iv_norain_c_cont_fstat <- iv_model_cont_norain_c$diagnostic_first_stage_fstatistic["value"] %>% round(digits = 2)

iv_norain_bin_fstat <- iv_model_bin_norain$diagnostic_first_stage_fstatistic["value"] %>% round(digits = 2)
iv_norain_c_bin_fstat <- iv_model_bin_norain_c$diagnostic_first_stage_fstatistic["value"] %>% round(digits = 2)

#print 
texreg(list(iv_model_cont_norain, iv_model_cont_norain_c, iv_model_bin_norain, iv_model_bin_norain_c),
       digits = 2, include.ci = FALSE, single.row = FALSE, include.fstatistic = FALSE, 
       include.rmse = FALSE, include.rsquared = FALSE, include.adjrs = FALSE, 
       include.nobs = TRUE, stars = c(0.001, 0.01, 0.05), caption.above	= TRUE,
       custom.coef.map	= list("(Intercept)" = "(Intercept)", "logcult" = "Coca Cultivation (log ha.)", "cultbinary" = "Coca Cultivation (binary)", "logcrops" = "Agriculture Control", "logfloods" = "Flood Control"),
       custom.gof.rows = list("First Stage F-stat" = c(iv_norain_cont_fstat, iv_norain_c_cont_fstat, iv_norain_bin_fstat, iv_norain_c_bin_fstat)),
       caption = "Production IV: Drop Rain from Weather & Soil Index"
)


